Shotcuts: multiline comment/uncomment: 'ctrl' + '/'
import matplotlib.pyplot as plt
plt.plot([1,2,3,4,5],[4,5,6,7,8])
# both x,y should have same length
# print length to check both
plt.show()
x = [1,2,3,4,5]
y = [4,5,6,7,8]
plt.plot(x,y)
plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title
plt.show()
x = [1,2,3,4,5]
y1 = [4,5,6,7,8]
y2 = [5,3,2,1,0]
plt.plot(x,y1, label='Initial Line')
plt.plot(x,y2, label='Final Line')
plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title
plt.legend()
plt.show()
x1 = [1,2,3,4,5]
x2 = [2,4,6,8,10]
y1 = [4,5,6,7,8]
y2 = [5,3,2,1,0]
plt.bar(x1,y1,label='one')
plt.bar(x2,y2,label='Second')
plt.xlabel('Plot x axix')
plt.ylabel('Plot y axis')
plt.title('Epic Graph') # Can use \n(new line) to fit the title
plt.legend()
plt.show()
test_score = [55,45,60,78,98,75,43,67,89,95,96,99,78,87,93,72]
x = [x for x in range(len(test_score))]
# plt.bar(x,test_score)
# plt.show()
bins = [10,20,30,40,50,60,70,80,90,100]
plt.hist(test_score,bins,histtype='bar',rwidth=0.8)
plt.show()
bins = [10,20,30,40,50,60,70,80,90,100]
plt.hist(test_score,bins,histtype='bar',cumulative=True,rwidth=0.8)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
# Fixing random state for reproducibility
np.random.seed(19680801)
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)
# the histogram of the data
n, bins, patches = plt.hist(x, 50, density=True, facecolor='g', alpha=0.75)
plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')# prints mu and sigma, adding text
plt.xlim(40, 160)
plt.ylim(0, 0.03)
plt.grid(True)
plt.show()
test_score_1 = [55,45,60,78,98,75,43,67,89,95,96,99,78,87,93,72]
test_score_2 = [19,11,23,45,67,89,90,34,56,67,65,54,34,23,56,78]
time_spent = [11,10,15,45,67,80,23,34,25,16,19,20,23,45,16,19]
plt.scatter(test_score_1,time_spent,marker='o',color='m')
plt.scatter(test_score_2,time_spent,marker='p',color='g')
plt.show()
year=[1,2,3,4,5,6,7,8,9,10]
taxes=[17,18,40,43,44,8,43,32,39,30]
overhead=[30,22,9,29,17,12,14,24,49,35]
entertainment=[41,32,27,13,19,12,22,18,28,20]
plt.plot([],[],color='m',label='Taxes')
plt.plot([],[],color='c',label='Overhead')
plt.plot([],[],color='g',label='Entertainment')
plt.stackplot(year,taxes,overhead,entertainment,colors=['m','c','g'])
plt.legend()
plt.show()
labels='Taxes','Overhead','Entertainment'
size=[25,32,12]
colors=['c','m','b']
plt.pie(size, labels=labels, colors=colors, startangle=0, autopct='%1.1f%%', explode=(0,0.1,0), shadow=True)
plt.axis('equal')
plt.show()
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import urllib
import numpy as np
def graph_data(stock):
print('Currently pulling:', stock)
url = 'http://chartapi.finance.yahoo.com/instrument/1.0/'+stock+'/chartdata;type=quote;range=10y/csv'
print(url)
stock = input('Stock to plot:')
graph_data(stock)
import pandas as pd
df = pd.read_csv(r"C:\Users\saurabhkumar9\Desktop\Python Training\Forecasting\4. Udemy - Python for Time Series Data Analysis\Data\starbucks.csv",
index_col='Date', parse_dates=True)
df.head()
title='Starbucks Closing Stock Prices'
ylabel='Closing Price (USD)'
xlabel='Closing Date'
ax = df['Close']['2017-01-01':'2017-03-01'].plot(figsize=(12,6),title=title,color='r',ls='--')
ax.grid(True, linestyle='--',color='g');
#ax.fill_between(df['Date'],df['Close'],55,where=(df['Close']<55),facecolor='g',alpha=0.5)
ax.autoscale(axis='x',tight=True);
ax.xaxis.label.set_color('b')
ax.yaxis.label.set_color('b')
ax.set(xlabel=xlabel, ylabel=ylabel);
Skipped anotation, animation etc.
import random
import matplotlib.pyplot as ptl
fig = plt.figure(figsize=(10,6)) # first define figure
def create_plot():
xs=[]
ys=[]
for i in range(10):
x=i
y=random.randrange(8)
xs.append(x)
ys.append(y)
return xs, ys
ax1=fig.add_subplot(4,2,1) # add_subplot(nrows, ncols, index:1, **kwargs)
ax2=fig.add_subplot(4,2,2) # add_subplot(nrows, ncols, index:2, **kwargs)
ax3=fig.add_subplot(4,2,3) # add_subplot(nrows, ncols, index:3, **kwargs)
ax4=fig.add_subplot(4,2,4) # add_subplot(nrows, ncols, index:3, **kwargs)
x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)
x,y = create_plot() # assigning x and y value
ax4.plot(x,y)
plt.show()
import random
import matplotlib.pyplot as ptl
fig = plt.figure(figsize=(10,6)) # first define figure
def create_plot():
xs=[]
ys=[]
for i in range(10):
x=i
y=random.randrange(8)
xs.append(x)
ys.append(y)
return xs, ys
ax1=fig.add_subplot(5,1,1) # add_subplot(nrows, ncols, index:1, **kwargs)
ax2=fig.add_subplot(5,1,2) # add_subplot(nrows, ncols, index:2, **kwargs)
ax3=fig.add_subplot(5,3,7) # add_subplot(nrows, ncols, index:3, **kwargs)
ax4=fig.add_subplot(5,3,8) # add_subplot(nrows, ncols, index:3, **kwargs)
ax5=fig.add_subplot(5,3,9) # add_subplot(nrows, ncols, index:3, **kwargs)
x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)
x,y = create_plot() # assigning x and y value
ax4.plot(x,y)
x,y = create_plot() # assigning x and y value
ax5.plot(x,y)
plt.show()
import random
import matplotlib.pyplot as ptl
fig = plt.figure(figsize=(10,6)) # first define figure
def create_plot():
xs=[]
ys=[]
for i in range(10):
x=i
y=random.randrange(8)
xs.append(x)
ys.append(y)
return xs, ys
ax1=plt.subplot2grid((6,1),(0,0),rowspan=1,colspan=1)
ax2=plt.subplot2grid((6,1),(1,0),rowspan=4,colspan=1)
ax3=plt.subplot2grid((6,1),(5,0),rowspan=4,colspan=1)
x,y = create_plot() # assigning x and y value
ax1.plot(x,y)
x,y = create_plot() # assigning x and y value
ax2.plot(x,y)
x,y = create_plot() # assigning x and y value
ax3.plot(x,y)
plt.show()
import plotly
plotly.__version__
#plotly.offline doesn't push your charts to the clouds
import plotly.offline as pyo
#allows us to create the Data and Figure objects
from plotly.graph_objs import *
#plotly.plotly pushes your charts to the cloud
import chart_studio.plotly.plotly as py
#pandas is a data analysis library
import pandas as pd
from pandas import DataFrame
pyo.offline.init_notebook_mode() # run at the start of every ipython
At a high level, the structure of the chart could be represented like this:
Figure = {'layout' : {<information about the chart's layout>}, 'data' : [{trace1}, {trace2}, {trace3}] }
Traces are a list of dictionaries which contain data and layouts are list of dictionary with information of chart
trace1 = {'type' : 'scatter',
'x' : [0,1,2,3,4,5,6,7,8,9],
'y' : [0,1,2,3,4,5,6,7,8,9],
'name' : 'trace1',
'mode' : 'lines'}
data = Data([trace1])
layout = {'title' : "My first plotly line chart",
'xaxis' : {'title' : 'X Values'},
'yaxis' : {'title' : 'Y Values'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Python's native dict.update() helps in updating the traces or layout
trace1
updatedY = {'y' : [0,3,7,2,6,9,1,4,5,8]}
trace1.update(updatedY)
trace1
layout
layout.update({'title' : 'My second Plotly line chart'})
layout
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
# We can create multiple charts by changing the trace
expenseData = pd.read_csv("http://richard-muir.com/data/public/csv/NumberOfMPsExpenseClaims_2010-2015.csv")
expenseData.head()
trace1 = {'type' : 'scatter',
'mode' : 'lines',
'name' : 'trace1',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims2010']}
data = Data([trace1])
layout = {'title' : 'Expenses by month in 2010',
'xaxis' : {'title' : 'Month'},
'yaxis' : {'title' : 'Number of Claims'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
trace_2010 = {'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims2010'],
'name' : '2010',
'mode' : 'lines'}
trace_2011 = {'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims2011'],
'name' : '2011',
'mode' : 'lines'}
trace_2012 = {'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims2012'],
'name' : '2012',
'mode' : 'lines'}
data = Data([trace_2010, trace_2011, trace_2012])
layout = {'title' : 'Expense claims by month for 2010 - 2012',
'xaxis' : {'title' : 'Month'},
'yaxis' : {'title' : 'Number of expense claims'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
traces = []
for i in range(2010, 2016):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(i)],
'name' : i,
'mode' : 'lines'})
traces
data = Data(traces)
layout = {'title' : 'Expense claims by month for 2010-2015',
'xaxis' : {'title' : 'month'},
'yaxis' : {'title' : 'Number of expense claims'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
for col in expenseData.columns.tolist():
print(col)
for col in expenseData.columns.tolist():
if col != 'month':
print(col)
traces = []
for col in expenseData.columns.tolist():
if col != 'month':
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData[col],
'name' : col,
'mode' : 'lines'})
traces
layout = {'title' : 'Expense claims by month for 2010-2015',
'xaxis' : {'title' : 'month'},
'yaxis' : {'title' : 'Number of expense claims'}}
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Plotly understands the standard CSS colour names, HEX codes or rgba codes. I'll use a combination of the CSS Colour Names and rgba in this course. The CSS colour names are really easy and intuitive to use and are helpful for quick examples, whilst using rgba allows us to set the opacity of a colour, as well as providing very fine control over the shade and hue.
To change the colour of a trace we need to add a 'marker' key to the trace. The value associated with the 'marker' key will be a dictionary which can contain a 'color' key:
trace = {'type' : 'scatter', 'marker' : {'color' : <CSS Color Name/HEX Code/RGB code/RGBA code/Variable>}
traces = []
for i in range(2010, 2016):
if i == 2015:
colour = 'Red'
else:
colour = 'Grey'
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(i)],
'name' : i,
'marker' : {'color' : colour},
'mode' : 'lines'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Alternatively, we can define a function to return the colour which we want.
def chooseColour(yr):
if yr == 2015:
return 'Blue'
else:
return 'Grey'
traces = []
for i in range(2010, 2016):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(i)],
'name' : i,
'marker' : {'color' : chooseColour(i)},
'mode' : 'lines'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
trace = {'type' : 'scatter', 'line' : {'width' : <Float/Integer/Variable>}
Just like color we can create variable or function to do this.
traces = []
for i in range(2010, 2016):
if i == 2015:
width = 2
colour = 'Red'
else:
width = 1
colour = 'Grey'
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(i)],
'name' : i,
'line' : {'width' : width},
'marker' : {'color' : colour},
'mode' : 'lines'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Solidity of a line can be changed by using 'dash' option in the 'line' dictionary in the trace:
trace = {'type' : 'scatter', 'line' : {'dash' : <Dash string/Dash length in pixels/Variable>}
Some valid dash strings are: 'solid' 'dash' 'dot' 'dashdot'
The enumerate() creates a variable which holds the value of each item's index in the list in addition to the variable which holds the actual value of the variable. This value starts at 0 for the first item and increments by one for each subsequent item.
dashes = ['dash', 'dot', 'dashdot']
for i, yr in enumerate(range(2010, 2016)):
print(i, yr)
for i, yr in enumerate(range(2010, 2016)):
print(dashes[(i//2)])
traces = []
for i, yr in enumerate(range(2010, 2016)):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(yr)],
'line' : {'dash' : dashes[i//2]},
'name' : yr,
'mode' : 'lines'})
layout = {'title' : 'Expense claims by month for 2010-2015',
'xaxis' : {'title' : 'month'},
'yaxis' : {'title' : 'Number of expense claims'}}
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
'marker' sub-dictionary to change the colour of trace. To change the marker symbol, add a 'symbol' key to this dictionary
trace = {'type' : 'scatter', 'marker' : {'symbol' : <Marker symbol string/Variable>}
We need to tell Plotly that we want to use markers in addition to lines by changing the value for 'mode' from 'lines' to 'lines+markers':
trace = {'type' : 'scatter', 'mode' : 'lines+markers'}
markerSymbols = ['circle','square','diamond','x','triangle-up','cross']
traces = []
for i, yr in enumerate(range(2010, 2016)):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(yr)],
'marker' : {'symbol' : markerSymbols[i]},
'name' : yr,
'mode' : 'lines+markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
# With Open Option
markerSymbolsOpen = ['circle-open','square-open','diamond-open','x-open','triangle-up-open','cross-open']
traces = []
for i, yr in enumerate(range(2010, 2016)):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(yr)],
'marker' : {'symbol' : markerSymbolsOpen[i]},
'name' : yr,
'mode' : 'lines+markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
# Change Size
markerSymbolsDot = ['circle-dot','square-dot','diamond-dot','x-dot','triangle-up-dot','cross-dot']
traces = []
for i, yr in enumerate(range(2010, 2016)):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(yr)],
'marker' : {'symbol' : markerSymbolsDot[i], 'size' : 10, 'line' : {'width' : 1}},
'name' : yr,
'mode' : 'markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
markerSymbolsOpenDot = ['circle-open-dot','square-open-dot','diamond-open-dot','x-open-dot','triangle-up-open-dot','cross-open-dot']
traces = []
for i, yr in enumerate(range(2010, 2016)):
traces.append({'type' : 'scatter',
'x' : expenseData['month'],
'y' : expenseData['NumberOfClaims' + str(yr)],
'marker' : {'symbol' : markerSymbolsOpenDot[i], 'size' : 10, 'line' : {'width' : 1}},
'name' : yr,
'mode' : 'markers'})
data = Data(traces)
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Before one apply smoothing to a line chart, one should consider very carefully why you are doing it, and what information one may lose by doing so. Essentially, plots data which actually doenot exist.
To smooth a line, we need to set the 'shape' option to 'spline' and pass a float value between 0 and 1.3 to the 'smoothing' option. Both of these options are inside the 'line' dictionary. Smoothing only works on scatterplots.
trace = {'type' : 'scatter', 'line' : {'smoothing' : <float between 0 and 1.3>, 'shape' : 'spline'}}
xVals = [1,2,3,4,5,6,7,8,9,10]
yVals = [11,15,8,21,14,28,19,10,5,20]
unSmoothTrace = {'type' : 'scatter',
'x' : xVals,
'y' : yVals,
'mode' : 'lines',
'line' : {'smoothing' : 0,
'shape' : 'spline'},
'name' : 'UnSmooth'}
smoothTrace = {'type' : 'scatter',
'x' : xVals,
'y' : yVals,
'mode' : 'lines',
'line' : {'smoothing' : 1.3,
'shape' : 'spline'},
'name' : 'Smooth'}
data = Data([unSmoothTrace, smoothTrace])
pyo.iplot(data)
updateSmoothTrace = {'smoothing' : 0.8}
smoothTrace['line'].update(updateSmoothTrace)
data = Data([unSmoothTrace, smoothTrace])
fig = Figure(data = data)
pyo.iplot(fig)
updateSmoothTrace = {'smoothing' : 0.3}
smoothTrace['line'].update(updateSmoothTrace)
data = Data([unSmoothTrace, smoothTrace])
fig = Figure(data = data)
pyo.iplot(fig)
from pandas_datareader import data
from scipy import signal
appleVals = data.get_data_yahoo('AAPL','1/1/2012','1/1/2013')
appleVals.head()
unSmoothApple = {'type' : 'scatter',
'x' : appleVals.index,
'y' : appleVals['Close']+50,
'mode' : 'lines',
'line' : {'smoothing' : 0,
'shape' : 'spline'},
'name' : 'Apple (UnSmooth)'}
smoothApple = {'type' : 'scatter',
'x' : appleVals.index,
'y' : appleVals['Close'],
'mode' : 'lines',
'line' : {'smoothing' : 1.3,
'shape' : 'spline'},
'name' : 'Apple (Smooth)'}
layout = {'title' : 'Stock closing prices for Apple in 2012',
'xaxis' : {'title' : 'Date'},
'yaxis' : {'title' : 'Closing Price ($)'}}
data = Data([unSmoothApple, smoothApple])
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
smoothApple.update({'y' : signal.savgol_filter(appleVals['Close'], 51, 3),})
unSmoothApple.update({'y' : appleVals['Close']})
data = Data([ smoothApple, unSmoothApple ])
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
Step-wise line shapes are only useful for displaying data of a specific nature. These plots display the data as a series of horizontal and vertical steps, rather than a smooth curve (as with smoothing), or a series of diagonal lines (as is normally the case).
For this reason, stepped lines should only be used to display data where the points change at a specific place. For example, the temperature change over a day should not be displayed used a step-wise line as the temperature can change fractionally every minute. On the other hand, something like the Bank of England base rate could be displayed using a step-wise line. Each month the Bank's board meets to decide what the base rate will be for that month. In the time between these meetings, the base rate doesn't change.
Let's use the Bank of England base rate to see what different kinds of step-wise line charts we can create.
The possible step-wise line options and their effects are:
"hv" - marker point is at the start of the horizontal section of the step
"vh" - marker point is at the start of the vertical section of the step
"hvh" - marker point is at the middle of the horizontal section of the step
"vhv" - marker point is at the middle of the vertical section of the step
baserate = pd.read_csv("http://www.richard-muir.com/data/public/csv/BoEBaseRate.csv")
baserate.drop_duplicates(subset="VALUE", inplace=True)
baserate = baserate.tail(10)
baserate
baseRateTrace = {'type' : 'scatter',
'x' : baserate['DATE'],
'y' : baserate['VALUE'],
'mode' : 'lines+markers',
'line' : {'shape' : 'hv'},
'name' : 'BoE Base Rate'}
layout = {'title' : 'Bank of England Base Rate, 2001 - 2009',
'xaxis' : {'title' : 'Date'},
'yaxis' : {'title' : 'Base Rate (%)'}}
data = Data([baseRateTrace])
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
def updateLine(val):
baseRateTrace['line'].update({'shape' : val})
data = Data([baseRateTrace])
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
updateLine('linear')
updateLine('vh')
updateLine('hvh') #the line descends (or ascends) from the marker point until the y-value of the next marker point before moving horizontally to meet that point
updateLine('vhv') #point is in the middle of the horizontal portion of the step.
The 'fill' option is contained within the trace object. The possible values are:
"none" - No fill
"tozeroy" - Fills to y = 0 (vertical fill)
"tozerox" - Fills to x = 0 (horizontal fill)
"tonexty" - Fills between traces (vertically, to the trace before or to 0 if there is no previous trace)
"tonextx" - Fills between traces (horizontally, to the trace before or to 0 if there is no previous trace)
"toself" - Connects the endpoints of a trace into a closed shape (useful for Kernel Density Plots)
"tonext" - Fills the space between two plots if one completely encloses the other (useful for Kernel Density Plots again)
emissions = pd.read_csv("http://richard-muir.com/data/public/csv/TotalCo2EmissionsByCountry.csv", index_col=0)
emissions.head()
columnNames = emissions.columns.tolist()
columnNames
UKEmissions = {'type' : 'scatter',
'x' : emissions['Year'],
'y' : emissions['United Kingdom | GBR'],
'mode' : 'lines',
'name' : 'UK Co2 Emissions'}
layout = {'title' : "Co2 emissions in kilotons for the UK, 1960-2015",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Co2 Emissions (kt)'}}
data = Data([UKEmissions])
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
UKEmissions.update({'fill' : 'tozeroy'})
data = Data([UKEmissions])
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
UKEmissions.update({'fillcolor' : 'rgba(89, 100, 212, 0.46)'})
data = Data([UKEmissions])
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
UKEmissions.update({'fill' : 'tozerox'})
data = Data([UKEmissions])
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
# x and y value switched
UKEmissions.update({'x' : emissions['United Kingdom | GBR'],
'y' : emissions['Year']})
data = Data([UKEmissions])
layout = {'title' : "Co2 emissions in kilotons for the UK, 1960-2015",
'xaxis' : {'title' : 'Co2 Emissions (kt)'},
'yaxis' : {'title' : 'Year'}}
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
Stacked area graphs are great at displaying how the share between different categories and the total of all categories changes over time.
When making a stacked area chart, the values for each additional trace are cumulative. That is, if Country A has 100Kt emissions, and Country B has 50Kt of emissions, the line for Country B must be drawn at 150Kt of emissions.
ColumnSelection = ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND','Year']
stackedAreaData = emissions.loc[:,(ColumnSelection)]
stackedAreaData.head(5)
UnitedArabEmirates = {'type' : 'scatter',
'x' : stackedAreaData['Year'],
'y' : stackedAreaData['United Arab Emirates | ARE'],
'mode' : 'lines',
'fill' : 'tonexty',
'name' : 'UAE Co2 Emissions'
}
UnitedKingdom = { 'type' : 'scatter',
'x' : stackedAreaData['Year'],
'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR'],
'mode' : 'lines',
'fill' : 'tonexty',
'name' : 'UK Co2 Emissions'
}
UnitedStates = { 'type' : 'scatter',
'x' : stackedAreaData['Year'],
'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
+ stackedAreaData['United States | USA'],
'mode' : 'lines',
'fill' : 'tonexty',
'name' : 'USA Co2 Emissions'
}
China = { 'type' : 'scatter',
'x' : stackedAreaData['Year'],
'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
+ stackedAreaData['United States | USA'] + stackedAreaData['China | CHN'],
'mode' : 'lines',
'fill' : 'tonexty',
'name' : 'China Co2 Emissions'
}
India = { 'type' : 'scatter',
'x' : stackedAreaData['Year'],
'y' : stackedAreaData['United Arab Emirates | ARE'] + stackedAreaData['United Kingdom | GBR']
+ stackedAreaData['United States | USA'] + stackedAreaData['China | CHN']
+ stackedAreaData['India | IND'],
'mode' : 'lines',
'fill' : 'tonexty',
'name' : 'India Co2 Emissions'
}
layout = {'title' : "Co2 emissions in kilotons, 1960-2011",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Co2 Emissions (kt)'}}
data = Data([UnitedArabEmirates, UnitedKingdom, UnitedStates, China, India])
fig = Figure(data=data, layout=layout)
pyo.iplot(fig)
Stacked area plot using the 'tonexty' option for the 'fill' option. This allowed us to compare the change in total emissions, as well as the change in each individual country's emissions over a period of time.
In this lesson we're going to create a stacked area plot which shows the percentage of total emissions that each country produced. The code for the chart will be almost identical to the previous lesson; the novelty in this lesson will be learning how to do a little data manipulation to get the data into percentages of total emissions, rather than just the raw figures
sumColumnSelection = ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',]
stackedAreaData = emissions.loc[:,(['Year'] + sumColumnSelection)]
stackedAreaData.head()
stackedAreaData['Total'] = stackedAreaData[sumColumnSelection].sum(axis = 1)
stackedAreaData.head()
for country in sumColumnSelection:
stackedAreaData["pc_"+str(country)] = stackedAreaData[country] / stackedAreaData['Total']
stackedAreaData.head()
sumColumnSelectionPC = ['pc_United Arab Emirates | ARE','pc_United Kingdom | GBR',
'pc_United States | USA','pc_China | CHN', 'pc_India | IND',]
PCAreaData = stackedAreaData[sumColumnSelectionPC].cumsum(axis=1)
PCAreaData.head()
PCAreaData['Year'] = stackedAreaData['Year']
PCAreaData.head()
traces = []
for col in PCAreaData.columns.tolist():
if col != 'Year':
traces.append({'type' : 'scatter',
'x' : PCAreaData['Year'],
'y' : PCAreaData[col],
'name' : col[3:-6],
'mode' : 'lines',
'fill' : 'tonexty'})
traces
data = Data(traces)
layout = {'title' : "Proportion of Co2 Emissions, 1960-2011",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Proprtion of Co2 Emissions'}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
def createStackedPropArea(df, time, cols, title, yaxisTitle):
"""
A function which manipulates the data into the correct format to produce a stacked proportional area plot with Plotly.
Takes five arguments:
df - a pandas DataFrame
time - the time element of the data, must be a column in the DataFrame as a string
cols - the name of the columns in the DataFrame which you want to include in the area plot as list
title - the title of the chart
yaxisTitle - the yaxis title of the chart (the xaxis title comes from the time variable)
"""
def createStackedPropArea(df, time, cols, title, yaxisTitle):
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF.fillna(0, inplace=True)
return stackedAreaDF
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND', 'Andorra | AND'],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
def createStackedPropArea(df, time, cols, title, yaxisTitle):
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF.fillna(0, inplace=True)
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
return stackedAreaDF
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
def createStackedPropArea(df, time, cols, title, yaxisTitle):
PCcols = []
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF.fillna(0, inplace=True)
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
for col in cols:
stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
PCcols.append("pc_"+str(col))
return stackedAreaDF
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
def createStackedPropArea(df, time, cols, title, yaxisTitle):
PCcols = []
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF.fillna(0, inplace=True)
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
for col in cols:
stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
PCcols.append("pc_"+str(col))
stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
stackedPCAreaDF[time] = stackedAreaDF[time]
return stackedPCAreaDF
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test.head()
def createStackedPropArea(df, time, cols, title, yaxisTitle):
PCcols = []
traces = []
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
stackedAreaDF.fillna(0, inplace=True)
for col in cols:
stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
PCcols.append("pc_"+str(col))
stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
stackedPCAreaDF[time] = stackedAreaDF[time]
for col in PCcols:
traces.append({'type' : 'scatter',
'x' : stackedPCAreaDF[time],
'y' : stackedPCAreaDF[col],
'name' : col[3:-6],
'mode' : 'lines',
'fill' : 'tonexty'})
return traces
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test
def createStackedPropArea(df, time, cols, title, yaxisTitle):
"""
A function which manipulates the data into the correct format to produce a stacked proportional area plot with Plotly.
Takes five arguments:
df - a pandas DataFrame
time - the time element of the data, must be a column in the DataFrame
cols - the name of the columns in the DataFrame which you want to include in the area plot
title - the title of the chart
yaxisTitle - the yaxis title of the chart (the xaxis title comes from the time variable)
"""
PCcols = []
traces = []
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF.fillna(0, inplace=True)
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
for col in cols:
stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
PCcols.append("pc_"+str(col))
stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
stackedPCAreaDF[time] = stackedAreaDF[time]
for col in PCcols:
traces.append({'type' : 'scatter',
'x' : stackedPCAreaDF[time],
'y' : stackedPCAreaDF[col],
'name' : col[3:-6],
'mode' : 'lines',
'fill' : 'tonexty'})
data = Data(traces)
layout = {'title' : title,
'xaxis' : {'title' : time},
'yaxis' : {'title' : yaxisTitle}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
createStackedPropArea(emissions, 'Year', ['China | CHN',
'United States | USA',
'India | IND',
'United Arab Emirates | ARE',
'United Kingdom | GBR',
],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
import random
countries = []
for i in range(10):
countries.append(random.choice(emissions.columns.tolist()))
countries
createStackedPropArea(emissions, 'Year', countries, "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
stocks = py.get_figure("https://plot.ly/~rmuir/162/stock-closing-prices-for-apple-in-2012/")
stocks = py.get_figure('rmuir', 162)
stocks
pyo.iplot(stocks)
Each of 'xaxis' and 'yaxis' within the layout object have their own 'range' attribute which we can change. We set the range for each axis as a list of two numbers (float or integer). These number correspond to the minimum and maximum values respectively:
layout = {'xaxis' : {'range' : [
stocks['layout']['yaxis'].update({'range' : [0, 1000]})
pyo.iplot(stocks)
maximum = max(stocks['data'][0]['y'])
maximum# Passing max + 5% value
stocks['layout']['yaxis'].update({'range' : [0, maximum * 1.05]})
pyo.iplot(stocks)
The 'tickformat' option is contained within each of the x- and y-axis objects within the layout:
layout = {'xaxis' : {'tickformat' :
To change the tick format, we can pass different format string as the value for this option.
Here are some common string format values we can pass: Percentage with 2 decimal places: ".2%" Percentage with 0 decimal places: ".0%" Currency with 2 decimal places: "$.2f" Currency with 0 decimal places: "$.0" Currency with 0 decimal places and thousand separator: "$,"
stocks['layout']['yaxis'].update({'tickformat' : '.2%'})
pyo.iplot(stocks)
stocks['layout']['yaxis'].update({'tickformat' : '$.2f'})
pyo.iplot(stocks)
stocks['layout']['yaxis'].update({'range' : [0, 1500], 'tickformat' : '$,'})
pyo.iplot(stocks)
stocks['layout'].update({'yaxis' : {'range' : [0, max(stocks['data'][0]['y']) * 1.05],
'title' : 'Closing Price', 'tickformat' : "$.0"}})
pyo.iplot(stocks)
Plotly has a builtin datetime formatter.
Datetime object is made of two parts; a date and a time. Plotly does not recognise a time without a date. A date object: 31/12/95 - 31st December 1995 A time object: 12:35:59.99 - Almost 12:36pm (This is not valid on its own) A datetime object: 31/12/95 12:35:59.99 - Almost 12:36pm on 31st December 1995
Common date formats:
UK date: "%d/%m/%y" UK date with 4 digit year: "%d/%m/%Y" American date with hyphens: "%m-%d-%Y" Abbreviated weekday and month names: "%a %d %b %Y" Unabbreviated weekday and month names: "%A %d %B %Y"
Common time formats:
Datetime objects have a default time of 00:00:00.00: 24 hour clock with microseconds: "%H:%M:%S.%f" 12 hour clock: "%H%p %M:%S" If you have date and time data that you need to display, you can combined these formats: American date with hyphens and 24 hour clock: "%m-%d-%Y %H:%M:%S"
def updateDT(dt):
stocks['layout'].update({'xaxis' : {'tickformat' : dt}})
pyo.iplot(stocks)
updateDT("%d/%m/%y")
updateDT("%m-%d-%Y")
updateDT("%a %d %b %Y")
updateDT("%b %Y")
A pie chart is a circular chart which is used to display categorical data where the different categories comprise parts of a whole. The area of the circle is divided into segments that each represent a proportion of a whole.
A pie chart should be used when there are very few categories; any more than 6 and it will become difficult to read. They should also only be used when there is a large distinction between one of the categories.
ethnicity = pd.read_csv("http://www.richard-muir.com/data/public/csv/UKStudentsEthnicity.csv",
index_col = 0, header=None, names=['N'])
ethnicity
To make a pie chart with Plotly, we only need to pass three parameters to our trace; 'labels', 'values' and 'type':
pieTrace = {'type' : 'pie',
'labels' : ethnicity.index,
'values' : ethnicity['N']}
data = [pieTrace]
layout = {'title' : 'Ethnicity of students in the UK'}
fig = {'data' : data, 'layout' : layout}
pyo.iplot(fig)
Sorting largest to smallest: 'sort': helps is sorting pie chart
fig['data'][0].update({'sort' : False})
pyo.iplot(fig)
pieTrace = {'type' : 'pie',
'labels' : ethnicity.index,
'values' : ethnicity['N'],
'sort' : True}
data = [pieTrace]
layout = {'title' : 'Ethnicity of students in the UK'}
fig = {'data' : data, 'layout' : layout}
pyo.iplot(fig)
'Direction': Parameters helps in defyining direction of sort. 'counterclockwise'or 'clockwise'
fig['data'][0].update({'direction' : 'clockwise',
'sort' : True})
pyo.iplot(fig)
ethPie = py.get_figure("rmuir", 263)
pyo.iplot(ethPie)
ethPie['data'][0].update({'marker' : {'colors' : ["rgb(12,192,170)",
"rgb(190,252,250)",
"rgb(77,194,84)",
"rgb(211,238,128)",
"rgb(97,167,193)"]}})
pyo.iplot(ethPie)
ethPie['data'][0]['marker'].update({'line' : {'color' : '#333',
'width' : [1,1,3,1,1]}})
pyo.iplot(ethPie)
ethPie['data'][0].update({'pull' : 0.1})
pyo.iplot(ethPie)
ethPie['data'][0].update({'pull' : [0, 0, 0.2, 0, 0]})
pyo.iplot(ethPie)
level = pd.read_csv("http://richard-muir.com/data/public/csv/StudentsByLevelAndYear.csv", index_col = 0)
level
level = level[['2015/16']]
level.sort_values(by='2015/16', ascending = False, inplace = True)
level
fig = {'data' : [{'type' : 'pie',
'name' : "Students by level of study",
'labels' : level.index,
'values' : level['2015/16'],
'direction' : 'clockwise',
'marker' : {'colors' : ["rgb(183,101,184)", "rgb(236,77,216)", "rgb(176,164,216)", "rgb(255,168,255)"]}}],
'layout' : {'title' : 'Students by level of study in 2015-16'}}
pyo.iplot(fig)
Pie charts also have a parameter called 'textinfo'. This determines which trace information appears written on the pie chart. 'textinfo' can take any of the following values, joined with a '+':
'label' - displays the label on the segment
'text' - displays the text on the segment (this can be set separately to the label)
'value' - displays the value passed into the trace
'percent' - displayed the computer percentage
We can see that the default is to only show 'percent'.
fig['data'][0].update({'text' : ['Undergrad FT',' Postgrad FT','Undergrad PT','Postgrad PT'],
'textinfo' : 'label+text+value+percent'})
pyo.iplot(fig)
# removing label
fig['data'][0].update({'textinfo' : 'text+value+percent',
'showlegend' : False})
pyo.iplot(fig)
# Information can be shown usinh hover info. Here we are displaying label.
fig['data'][0].update({'hoverinfo' : 'label'})
pyo.iplot(fig)
level = py.get_figure("rmuir", 269)
pyo.iplot(level)
level['data'][0].update({'textposition' : 'none'}) # Removes the text
pyo.iplot(level)
level['data'][0].update({'textposition' : 'outside'})
pyo.iplot(level)
level['data'][0].update({'textposition' : 'inside'})
pyo.iplot(level)
# Styling Text
pieTrace = {'type' : 'pie',
'labels' : ethnicity.index,
'values' : ethnicity['N'],
'marker' : {'colors' : ["rgb(12,192,170)",
"rgb(190,252,250)",
"rgb(77,194,84)",
"rgb(211,238,128)",
"rgb(97,167,193)"]},
'pull' : [0, 0, 0.2, 0, 0]
}
data = [pieTrace]
layout = {'title' : 'Ethnicity of students in the UK'}
fig = {'data' : data, 'layout' : layout}
pyo.iplot(fig)
Text can be set separately using 'outsidetextfont' and 'insidetextfont'
fig['data'][0].update({'outsidetextfont' : {'size' : 16},
'insidetextfont' : {'color' : 'black',
'size' : 13}})
pyo.iplot(fig)
from plotly.tools import make_subplots
sub = make_subplots(rows = 2, cols = 2)
print(sub)
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (2,1) x3,y3 ] [ (2,2) x4,y4 ]
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=3, cols=1)
fig.append_trace(go.Scatter(
x=[3, 4, 5],
y=[1000, 1100, 1200],
), row=1, col=1)
fig.append_trace(go.Scatter(
x=[2, 3, 4],
y=[100, 110, 120],
), row=2, col=1)
fig.append_trace(go.Scatter(
x=[0, 1, 2],
y=[10, 11, 12]
), row=3, col=1)
fig.update_layout(height=600, width=600, title_text="Stacked subplots")
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=3, cols=1)
fig.append_trace(go.Scatter(
x=[3, 4, 5],
y=[1000, 1100, 1200],
), row=1, col=1)
fig.append_trace(go.Scatter(
x=[2, 3, 4],
y=[100, 110, 120],
), row=2, col=1)
fig.append_trace(go.Scatter(
x=[0, 1, 2],
y=[10, 11, 12]
), row=3, col=1)
fig.update_layout(height=600, width=600, title_text="Stacked subplots")
fig.show()
from plotly.subplots import make_subplots
fig = make_subplots(rows=2, cols=2, start_cell="bottom-left")
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
row=1, col=2)
fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]),
row=2, col=1)
fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
row=2, col=2)
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=2, cols=2,
subplot_titles=("Plot 1", "Plot 2", "Plot 3", "Plot 4"))
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
row=1, col=2)
fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]),
row=2, col=1)
fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
row=2, col=2)
fig.update_layout(height=500, width=700,
title_text="Multiple Subplots with Titles")
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=1, cols=2)
fig.add_trace(
go.Scatter(
x=[1, 2, 3],
y=[4, 5, 6],
mode="markers+text",
text=["Text A", "Text B", "Text C"],
textposition="bottom center"
),
row=1, col=1
)
fig.add_trace(
go.Scatter(
x=[20, 30, 40],
y=[50, 60, 70],
mode="markers+text",
text=["Text D", "Text E", "Text F"],
textposition="bottom center"
),
row=1, col=2
)
fig.update_layout(height=600, width=800, title_text="Annotations and subplots")
fig.show()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
row=1, col=2)
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
# Initialize figure with subplots
fig = make_subplots(
rows=2, cols=2, subplot_titles=("Plot 1", "Plot 2", "Plot 3", "Plot 4")
)
# Add traces
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[4, 5, 6]), row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[50, 60, 70]), row=1, col=2)
fig.add_trace(go.Scatter(x=[300, 400, 500], y=[600, 700, 800]), row=2, col=1)
fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]), row=2, col=2)
# Update xaxis properties
fig.update_xaxes(title_text="xaxis 1 title", row=1, col=1)
fig.update_xaxes(title_text="xaxis 2 title", range=[10, 50], row=1, col=2)
fig.update_xaxes(title_text="xaxis 3 title", showgrid=False, row=2, col=1)
fig.update_xaxes(title_text="xaxis 4 title", type="log", row=2, col=2)
# Update yaxis properties
fig.update_yaxes(title_text="yaxis 1 title", row=1, col=1)
fig.update_yaxes(title_text="yaxis 2 title", range=[40, 80], row=1, col=2)
fig.update_yaxes(title_text="yaxis 3 title", showgrid=False, row=2, col=1)
fig.update_yaxes(title_text="yaxis 4 title", row=2, col=2)
# Update title and height
fig.update_layout(title_text="Customizing Subplot Axes", height=700)
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02
)
fig.add_trace(go.Scatter(x=[0, 1, 2], y=[10, 11, 12]),
row=3, col=1)
fig.add_trace(go.Scatter(x=[2, 3, 4], y=[100, 110, 120]),
row=2, col=1)
fig.add_trace(go.Scatter(x=[3, 4, 5], y=[1000, 1100, 1200]),
row=1, col=1)
fig.update_layout(height=600, width=600,
title_text="Stacked Subplots with Shared X-Axes")
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=2, cols=2, shared_yaxes=True)
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[2, 3, 4]),
row=1, col=1)
fig.add_trace(go.Scatter(x=[20, 30, 40], y=[5, 5, 5]),
row=1, col=2)
fig.add_trace(go.Scatter(x=[2, 3, 4], y=[600, 700, 800]),
row=2, col=1)
fig.add_trace(go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000]),
row=2, col=2)
fig.update_layout(height=600, width=600,
title_text="Multiple Subplots with Shared Y-Axes")
fig.show()
The colspan subplot option specifies the number of grid columns that the subplot starting in the given cell should occupy. If unspecified, colspan defaults to 1.
Here is an example that creates a 2 by 2 subplot grid containing 3 subplots. The subplot specs element for position (2, 1) has a colspan value of 2, causing it to span the full figure width. The subplot specs element for position (2, 2) is None because no subplot begins at this location in the grid.
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=2, cols=2,
specs=[[{}, {}],
[{"colspan": 2}, None]],
subplot_titles=("First Subplot","Second Subplot", "Third Subplot"))
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2]),
row=1, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2]),
row=1, col=2)
fig.add_trace(go.Scatter(x=[1, 2, 3], y=[2, 1, 2]),
row=2, col=1)
fig.update_layout(showlegend=False, title_text="Specs with Subplot Title")
fig.show()
rowspan and colspan subplot options to create a custom subplot layout with subplots of mixed sizes
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=5, cols=2,
specs=[[{}, {"rowspan": 2}],
[{}, None],
[{"rowspan": 2, "colspan": 2}, None],
[None, None],
[{}, {}]],
print_grid=True)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(1,1)"),
row=1, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(1,2)"), row=1, col=2)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(2,1)"), row=2, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(3,1)"), row=3, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(5,1)"), row=5, col=1)
fig.add_trace(go.Scatter(x=[1, 2], y=[1, 2], name="(5,2)"), row=5, col=2)
fig.update_layout(height=600, width=600, title_text="specs examples")
fig.show()
Subplots Types
By default, the make_subplots function assumes that the traces that will be added to all subplots are 2-dimensional cartesian traces (e.g. scatter, bar, histogram, violin, etc.). Traces with other subplot types (e.g. scatterpolar, scattergeo, parcoords, etc.) are supporteed by specifying the type subplot option in the specs argument to make_subplots. Here are the possible values for the type option:
trace type: A trace type name (e.g. "bar", "scattergeo", "carpet", "mesh", etc.) which will be used to determine the appropriate subplot type for that trace.
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=2, cols=2,
specs=[[{"type": "xy"}, {"type": "polar"}],
[{"type": "domain"}, {"type": "scene"}]],
)
fig.add_trace(go.Bar(y=[2, 3, 1]),
row=1, col=1)
fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
row=1, col=2)
fig.add_trace(go.Pie(values=[2, 3, 1]),
row=2, col=1)
fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0], z=[0.5, 1, 2], mode="lines"),
row=2, col=2)
fig.update_layout(height=700, showlegend=False)
fig.show()
import plotly.graph_objects as go
fig = make_subplots(
rows=2, cols=2,
specs=[[{"type": "bar"}, {"type": "barpolar"}],
[{"type": "pie"}, {"type": "scatter3d"}]],
)
fig.add_trace(go.Bar(y=[2, 3, 1]),
row=1, col=1)
fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
row=1, col=2)
fig.add_trace(go.Pie(values=[2, 3, 1]),
row=2, col=1)
fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0], z=[0.5, 1, 2], mode="lines"),
row=2, col=2)
fig.update_layout(height=700, showlegend=False)
fig.show()
import plotly.graph_objects as go
trace1 = go.Scatter(
x=[1, 2, 3],
y=[2, 3, 4]
)
trace2 = go.Scatter(
x=[20, 30, 40],
y=[5, 5, 5],
xaxis="x2",
yaxis="y"
)
trace3 = go.Scatter(
x=[2, 3, 4],
y=[600, 700, 800],
xaxis="x",
yaxis="y3"
)
trace4 = go.Scatter(
x=[4000, 5000, 6000],
y=[7000, 8000, 9000],
xaxis="x4",
yaxis="y4"
)
data = [trace1, trace2, trace3, trace4]
layout = go.Layout(
xaxis=dict(
domain=[0, 0.45]
),
yaxis=dict(
domain=[0, 0.45]
),
xaxis2=dict(
domain=[0.55, 1]
),
xaxis4=dict(
domain=[0.55, 1],
anchor="y4"
),
yaxis3=dict(
domain=[0.55, 1]
),
yaxis4=dict(
domain=[0.55, 1],
anchor="x4"
)
)
fig = go.Figure(data=data, layout=layout)
fig.show()
import plotly.graph_objects as go
trace1 = go.Scatter(
x=[0, 1, 2],
y=[10, 11, 12]
)
trace2 = go.Scatter(
x=[2, 3, 4],
y=[100, 110, 120],
yaxis="y2"
)
trace3 = go.Scatter(
x=[3, 4, 5],
y=[1000, 1100, 1200],
yaxis="y3"
)
data = [trace1, trace2, trace3]
layout = go.Layout(
yaxis=dict(
domain=[0, 0.33]
),
legend=dict(
traceorder="reversed"
),
yaxis2=dict(
domain=[0.33, 0.66]
),
yaxis3=dict(
domain=[0.66, 1]
)
)
fig = go.Figure(data=data, layout=layout)
fig.show()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
labels = ["US", "China", "European Union", "Russian Federation", "Brazil", "India",
"Rest of World"]
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[16, 15, 12, 6, 5, 4, 42], name="GHG Emissions"),
1, 1)
fig.add_trace(go.Pie(labels=labels, values=[27, 11, 25, 8, 1, 3, 25], name="CO2 Emissions"),
1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(
title_text="Global Emissions 1990-2011",
# Add annotations in the center of the donut pies.
annotations=[dict(text='GHG', x=0.18, y=0.5, font_size=20, showarrow=False),
dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False)])
fig.show()
Donut charts can be used when the information that we need to convey is very simple and the differences between the categories are stark. They can be used when seeing the general trend in the data is more important than knowing the exact figures.
Donut charts are most useful when you need to display contextual information about your data, and that contextual information must be inextricably linked to that bit of data - by using a donut chart, you can put the information in the hole.
outcomes = pd.read_csv("http://richard-muir.com/data/public/csv/StudentOutcomes201415BySubjectArea.csv", index_col = 0)
outcomes
fig = {'data' : [{'type' : 'pie',
'labels' : outcomes.columns.tolist(),
'values' : outcomes.loc['Medicine & dentistry'],
'name' : 'Medicine & dentistry',
'direction' : 'clockwise',
'hole' : 0.5}],
'layout' : {'title' : 'Outcomes for medicine and dentistry students'}}
pyo.iplot(fig)
info = "Medicine & Dentistry students are more likely to be employed than students from any other subject area"
fig['layout'].update({'annotations' : [{'text' : info,
'xref' : 'paper',
'yref' : 'paper',
'x' : 0.5,
'y' : 0.5,
'showarrow' : False}]})
pyo.iplot(fig)
# Adding Line Breaks
info = "<b>Medicine &<br>Dentistry students<br>are more likely to<br>be employed than<br>students from any<br>other subject<br>area</b>"
fig['layout'].update({'annotations' : [{'text' : info,
'xref' : 'paper',
'yref' : 'paper',
'x' : 0.5,
'y' : 0.5,
'showarrow' : False,
'font' : {'size' : 16}}]})
pyo.iplot(fig)
fig['data'][0].update({'hole' : 0.55})
pyo.iplot(fig)
Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves. The sunburst sector hierarchy is determined by the entries in labels (names in px.sunburst) and in parents. The root starts from the center and children are added to the outer rings.
Main arguments:
# Using plotly express
import plotly.express as px
data = dict(
character=["Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
parent=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
value=[10, 14, 12, 10, 2, 6, 6, 4, 4])
fig =px.sunburst(
data,
names='character',
parents='parent',
values='value',
)
fig.show()
# Using go sunburn
import plotly.graph_objects as go
fig =go.Figure(go.Sunburst(
labels=["Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
parents=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
values=[10, 14, 12, 10, 2, 6, 6, 4, 4],
))
# Update layout for tight margin
# See https://plot.ly/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
fig.show()
fig =go.Figure(go.Sunburst(
ids=[
"North America", "Europe", "Australia", "North America - Football", "Soccer",
"North America - Rugby", "Europe - Football", "Rugby",
"Europe - American Football","Australia - Football", "Association",
"Australian Rules", "Autstralia - American Football", "Australia - Rugby",
"Rugby League", "Rugby Union"
],
labels= [
"North<br>America", "Europe", "Australia", "Football", "Soccer", "Rugby",
"Football", "Rugby", "American<br>Football", "Football", "Association",
"Australian<br>Rules", "American<br>Football", "Rugby", "Rugby<br>League",
"Rugby<br>Union"
],
parents=[
"", "", "", "North America", "North America", "North America", "Europe",
"Europe", "Europe","Australia", "Australia - Football", "Australia - Football",
"Australia - Football", "Australia - Football", "Australia - Rugby",
"Australia - Rugby"
],
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
fig.show()
import plotly.graph_objects as go
fig =go.Figure(go.Sunburst(
labels=[ "Eve", "Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"],
parents=["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve" ],
values=[ 65, 14, 12, 10, 2, 6, 6, 4, 4],
branchvalues="total",
))
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
fig.show()
import plotly.graph_objects as go
import pandas as pd
df1 = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/718417069ead87650b90472464c7565dc8c2cb1c/sunburst-coffee-flavors-complete.csv')
df2 = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/718417069ead87650b90472464c7565dc8c2cb1c/coffee-flavors.csv')
fig = go.Figure()
fig.add_trace(go.Sunburst(
ids=df1.ids,
labels=df1.labels,
parents=df1.parents,
domain=dict(column=0)
))
fig.add_trace(go.Sunburst(
ids=df2.ids,
labels=df2.labels,
parents=df2.parents,
domain=dict(column=1),
maxdepth=2
))
fig.update_layout(
grid= dict(columns=2, rows=1),
margin = dict(t=0, l=0, r=0, b=0)
)
fig.show()
The example below visualizes a breakdown of sales (corresponding to sector width) and call success rate (corresponding to sector color) by region, county and salesperson level. For example, when exploring the data you can see that although the East region is behaving poorly, the Tyler county is still above average -- however, its performance is reduced by the poor success rate of salesperson GT.
In the right subplot which has a maxdepth of two levels, click on a sector to see its breakdown to lower levels.
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/sales_success.csv')
print(df.head())
levels = ['salesperson', 'county', 'region'] # levels used for the hierarchical chart
color_columns = ['sales', 'calls']
value_column = 'calls'
def build_hierarchical_dataframe(df, levels, value_column, color_columns=None):
"""
Build a hierarchy of levels for Sunburst or Treemap charts.
Levels are given starting from the bottom to the top of the hierarchy,
ie the last level corresponds to the root.
"""
df_all_trees = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
for i, level in enumerate(levels):
df_tree = pd.DataFrame(columns=['id', 'parent', 'value', 'color'])
dfg = df.groupby(levels[i:]).sum(numerical_only=True)
dfg = dfg.reset_index()
df_tree['id'] = dfg[level].copy()
if i < len(levels) - 1:
df_tree['parent'] = dfg[levels[i+1]].copy()
else:
df_tree['parent'] = 'total'
df_tree['value'] = dfg[value_column]
df_tree['color'] = dfg[color_columns[0]] / dfg[color_columns[1]]
df_all_trees = df_all_trees.append(df_tree, ignore_index=True)
total = pd.Series(dict(id='total', parent='',
value=df[value_column].sum(),
color=df[color_columns[0]].sum() / df[color_columns[1]].sum()))
df_all_trees = df_all_trees.append(total, ignore_index=True)
return df_all_trees
df_all_trees = build_hierarchical_dataframe(df, levels, value_column, color_columns)
average_score = df['sales'].sum() / df['calls'].sum()
fig = make_subplots(1, 2, specs=[[{"type": "domain"}, {"type": "domain"}]],)
fig.add_trace(go.Sunburst(
labels=df_all_trees['id'],
parents=df_all_trees['parent'],
values=df_all_trees['value'],
branchvalues='total',
marker=dict(
colors=df_all_trees['color'],
colorscale='RdBu',
cmid=average_score),
hovertemplate='<b>%{label} </b> <br> Sales: %{value}<br> Success rate: %{color:.2f}',
name=''
), 1, 1)
fig.add_trace(go.Sunburst(
labels=df_all_trees['id'],
parents=df_all_trees['parent'],
values=df_all_trees['value'],
branchvalues='total',
marker=dict(
colors=df_all_trees['color'],
colorscale='RdBu',
cmid=average_score),
hovertemplate='<b>%{label} </b> <br> Sales: %{value}<br> Success rate: %{color:.2f}',
maxdepth=2
), 1, 2)
fig.update_layout(margin=dict(t=10, b=10, r=10, l=10))
fig.show()
Bar Chart: A barchart is used to display categorical data, with the size of the bar representing the quantity (or sometimes proportion) in that particular category. Barcharts make it easy to compare different categories because we can easily assess which bars are longer than others. Barcharts can be horizontal or vertical.
meteorite = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsPerYear.csv", index_col = 0)
meteorite.head()
numberOfMeteorites = {'type' : 'bar',
'x' : meteorite.index,
'y' : meteorite['count']}
pyo.iplot([numberOfMeteorites])
# Adding layout
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
numberOfMeteorites = {'type' : 'bar',
'x' : meteorite.index,
'y' : meteorite['count']}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
access the 'color' property within 'marker' sub-dictionary
fig['data'][0].update({'marker' : {'color' : 'lightblue'}})
pyo.iplot(fig)
We can also change different styling options for the bars. Adding a grey outline to each bar:
fig['data'][0]['marker'].update({'line' : {'color' : '#333',
'width' : 2}})
pyo.iplot(fig)
fig['data'][0].update({'opacity' : 0.5})
pyo.iplot(fig)
numberOfMeteorites = {'type' : 'bar',
'x' : meteorite.index,
'y' : meteorite['count'],
'marker' : {'color' : 'lightblue',
'line' : {'color' : '#333',
'width' : 2}},
'opacity' : 0.5,}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
colours = ['lightblue' for x in meteorite.index ]
colours
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
colours
numberOfMeteorites = {'type' : 'bar',
'x' : meteorite.index,
'y' : meteorite['count'],
'marker' : {'color' : colours,
'line' : {'color' : '#333',
'width' : 2}},
'opacity' : 0.5,}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
# Finally adding annotation
fig['layout']['annotations'].append({'text' : 'Comet Bradfield 12,000km from Earth',
'x' : 2003,
'y' : 3323,
'showarrow' : True})
pyo.iplot(fig)
meteorite = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsPerYear.csv", index_col = 0)
meteorite.head()
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
'x' : meteorite.index,
'y' : meteorite['count'],
'marker' : {'color' : colours,
'line' : {'color' : '#333',
'width' : 2}},
'opacity' : 0.5,}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False},
{'text' : 'Comet Bradfield 12,000km from Earth',
'x' : 2003,
'y' : 3323,
'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
Set the 'orientation' parameter in the trace to 'h' to tell Plotly that we want this trace to be for a horizontal barchart.
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
#NEW CODE GOES HERE
'orientation' : 'h',
'x' : meteorite.index,
'y' : meteorite['count'],
'marker' : {'color' : colours,
'line' : {'color' : '#333',
'width' : 2}},
'opacity' : 0.5,}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False},
{'text' : 'Comet Bradfield 12,000km from Earth',
'x' : 2003,
'y' : 3323,
'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
Above case we also need to swipe X and Y values
colours = ['lightblue' if x != 2003 else 'purple' for x in meteorite.index ]
numberOfMeteorites = {'type' : 'bar',
'orientation' : 'h',
#SWAP X- AND Y-COORDINATES HERE
'x' : meteorite['count'],
'y' : meteorite.index,
'marker' : {'color' : colours,
'line' : {'color' : '#333',
'width' : 2}},
'opacity' : 0.5,}
layout = {'title' : "Number of meteorites found per year",
'xaxis' : {'title' : 'Number of meteorites'},
'yaxis' : {'title' : 'Year'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False},
{'text' : 'Comet Bradfield 12,000km from Earth',
#SWAP X- AND Y-COORDINATES HERE
'x' : 3323,
'y' : 2003,
'showarrow' : True}]}
fig = {'data' : [numberOfMeteorites],
'layout' : layout}
pyo.iplot(fig)
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
continents = list(meteorites['continent'].unique())
continents
traces = []
for c in continents:
traces.append({'type' : 'bar',
'name' : c,
'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
'y' : meteorites.loc[meteorites['continent'] == c, 'count']})
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
# Another way of grouping
years = list(meteorites['year'].unique())
years
traces = []
for y in years:
traces.append({'type' : 'bar',
'name' : y,
# CHANGE TO SPLIT BY YEAR
'x' : meteorites.loc[meteorites['year'] == y, 'continent'],
'y' : meteorites.loc[meteorites['year'] == y, 'count'],
'opacity' : 0.7})
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
'xaxis' : {'title' : 'Continent'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
#lets us see the charts in an iPython Notebook
pyo.offline.init_notebook_mode() # run at the start of every ipython
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
continents = list(meteorites['continent'].unique())
continents
traces = []
for c in continents:
traces.append({'type' : 'bar',
'name' : c,
'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
'y' : meteorites.loc[meteorites['continent'] == c, 'count'],
'opacity' : 0.7})
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
# Changing the 'barmode'
The 'barmode' option is contained within the layout and can take one of two options: 'stack' - makes a stacked bar chart 'group' - the default, makes a grouped bar chart (as above) Let's change the 'barmode' to 'stacked' for this chart:
fig['layout'].update({'barmode' : 'stack'})
pyo.iplot(fig)
continents = ['Antarctica',
'Asia',
'Africa',
'South America',
'North America',
'Australia',
'Europe',]
traces = []
for c in continents:
traces.append({'type' : 'bar',
'name' : c,
'x' : meteorites.loc[meteorites['continent'] == c, 'year'],
'y' : meteorites.loc[meteorites['continent'] == c, 'count'],
'opacity' : 0.7})
layout = {'title' : "Meteorites found by continent, 2000 - 2012",
'barmode' : 'stack',
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Number of meteorites'},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
meteorites = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoritesByContinent.csv", index_col = 0)
meteorites.head()
# Firstly we need a list of the unique continents, which we'll use to calculate the proportions:
continents = list(meteorites['continent'].unique())
continents
meteorites = meteorites.pivot(index='year',columns = 'continent', values='count')
meteorites.head()
meteorites.fillna(value = 0, inplace = True)
meteorites.head()
# using df.sum()
meteorites['total'] = meteorites.sum(axis = 1)
meteorites.head()
# loop through our list of continents, calculating the proportion of meteorites each year that fell in each continent:
for c in continents:
meteorites["{}_pc".format(c)] = meteorites[c] / meteorites['total']
meteorites.head()
pcContinents = [c + "_pc" for c in continents]
sortKeys = dict(meteorites[pcContinents].sum())
pcContinents = sorted(pcContinents, key=lambda k: sortKeys[k], reverse=True)
pcContinents
traces = []
for c in pcContinents:
traces.append({'type' : 'bar',
'name' : c[:-3],
'x' : meteorites.index,
'y' : meteorites[c],
'opacity' : 0.7})
pyo.iplot(traces)
# Stacked chart
layout = {'title' : "Proportion of meteorites found by continent, 2000 - 2012",
'barmode' : 'stack',
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Proportion of meteorites',
'tickformat' : '%',
'hoverformat' : '%',},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
fig = {'data' : traces, 'layout' : layout}
pyo.iplot(fig)
sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)
sizes.head()
sizeStrings = sizes.columns.tolist()
sizeStrings
# Calculating the total and percentages:
sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
sizes.head()
layout = {'title' : "Proportion of meteorites found by continent, 2000 - 2012",
'barmode' : 'stack',
'xaxis' : {'title' : 'Year'},
'yaxis' : {'title' : 'Proportion of meteorites',
'tickformat' : '%',
'hoverformat' : '%',},
'annotations' : [{'text' : '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'font' : {'color' : 'grey',
'size' : 10},
'xref' : 'paper',
'yref' : 'paper',
'x' : 0,
'y' : -0.2,
'showarrow' : False}]}
trace2 = {'type' : 'scatter',
'mode' : 'lines+markers',
'x' : sizes.index,
'y' : sizes['less than 101g_pc'],
'marker' : {'color' : '#333'},
'name' : 'Meteorite < 101g'}
fig = {'data' : traces, 'layout' : layout}
# Adding 2nd trace to data
fig['data'].append({ 'type' : 'scatter',
'mode' : 'lines+markers',
'x' : sizes.index,
'y' : sizes['less than 101g_pc'],
'marker' : {'color' : '#333'},
'name' : 'Meteorite < 101g'})
pyo.iplot(fig)
fig = make_subplots(rows = 3, cols = 3,
specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
[None, None, None],
[{'colspan' : 3}, None, None]],
subplot_titles = ["Types of meteorite by weight",
"Number of meteorites by continent",
"Weight categories of meteorite",])
fig.append_trace({'type' : 'scatter'}, row = 1, col = 1)
pyo.iplot(fig)
# Adding first chart
stacked = py.get_figure("rmuir", 241)
for d in stacked['data']:
xVals = d['y']
yVals = d['x']
d.update({'orientation' : 'h',
'x' : xVals,
'y' : yVals})
fig.append_trace(d, row = 1, col = 3)
pyo.iplot(fig)
# Styling the bove chart
fig['layout']['xaxis2'].update({'tickformat' : '%',
'hoverformat' : '%'})
fig['layout'].update({'barmode' : 'stack',
'height' : 1000})
pyo.iplot(fig)
# Full code
fig = make_subplots(rows = 3, cols = 3,
specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
[None, None, None],
[{'colspan' : 3}, None, None]],
subplot_titles = ["Types of meteorite by weight",
"Number of meteorites by continent",
"Weight categories of meteorite",
])
stacked = py.get_figure("rmuir", 241)
for d in stacked['data']:
xVals = d['y']
yVals = d['x']
d.update({'orientation' : 'h',
'x' : xVals,
'y' : yVals})
fig.append_trace(d, row = 1, col = 3)
fig['layout']['xaxis2'].update({'tickformat' : '%',
'hoverformat' : '%'})
fig['layout'].update({'barmode' : 'stack',
'height' : 1000})
pyo.iplot(fig)
# How many meteorites were found each year in each weight category?
sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)
sizeStrings = sizes.columns.tolist()
sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
sizes.head()
for s in sizeStrings:
fig.append_trace({'type' : 'scatter',
'mode' : 'markers+lines',
'x' : sizes.index,
'y' : sizes["{}_pc".format(s)],
'name' : s},
row = 3, col = 1)
fig['layout']['yaxis3'].update({'tickformat' : '%',
'hoverformat' : '%'})
pyo.iplot(fig)
typeWeight = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightAndType.csv", index_col = 0)
typeWeight.head()
fig.append_trace({'type' : 'scatter',
'mode' : "markers",
'x' : typeWeight['wideClass'],
'y' : typeWeight['mass (g)'],
'showlegend' : False},
row = 1, col = 1)
fig['layout']['yaxis1'].update({'title' : 'Weight (g)'})
pyo.iplot(fig)
fig = make_subplots(rows = 3, cols = 3,
specs = [[{'rowspan' : 2, 'colspan' : 2}, None, {'rowspan' : 2}],
[None, None, None],
[{'colspan' : 3}, None, None]],
subplot_titles = ["Types of meteorite by weight",
"Number of meteorites by continent",
"Weight categories of meteorite",])
stackedBarColours = ["rgb(95,134,183)", "rgb(177,200,235)", "rgb(1,54,136)",
"rgb(237,180,236)", "rgb(104,12,113)", "rgb(241,82,182)",
"rgb(101,230,249)",]
stacked = py.get_figure("rmuir", 241)
for i, d in enumerate(stacked['data']):
xVals = d['y']
yVals = d['x']
d.update({'orientation' : 'h',
'x' : xVals,
'y' : yVals,
'marker' : {'color' : stackedBarColours[i]},
'legendgroup' : 'continents'})
fig.append_trace(d, row = 1, col = 3)
fig['layout']['xaxis2'].update({'tickformat' : '%',
'hoverformat' : '%'})
fig['layout'].update({'barmode' : 'stack',
'height' : 1000})
pyo.iplot(fig)
sizes = pd.read_csv("http://richard-muir.com/data/public/csv/MeteoriteLandingsbyWeightPerYear.csv", index_col = 0)
sizeStrings = sizes.columns.tolist()
sizes['total'] = sizes.sum(axis = 1)
for s in sizeStrings:
sizes["{}_pc".format(s)] = sizes[s] / sizes['total']
sizeColours = ["rgb(194,87,211)", "rgb(77,87,168)", "rgb(223,184,245)", "rgb(88,38,166)"]
for i, s in enumerate(sizeStrings):
fig.append_trace({'type' : 'scatter',
'mode' : 'markers+lines',
'x' : sizes.index,
'y' : sizes["{}_pc".format(s)],
'name' : s,
'marker' : {'color' : sizeColours[i]},
'legendgroup' : 'weightsbyyear'},
row = 3, col = 1)
fig['layout']['yaxis3'].update({'tickformat' : '%',
'hoverformat' : '%'})
pyo.iplot(fig)
fig.append_trace({'type' : 'scatter',
'mode' : "markers",
'x' : typeWeight['wideClass'],
'y' : typeWeight['mass (g)'],
'showlegend' : False,
#NEW CODE GOES HERE:
'hoverinfo' : 'x+y',
'marker' : {'color' : "rgba(107,20,214, 0.5)"}},
row = 1, col = 1)
fig['layout']['yaxis1'].update({'title' : 'Weight (g)'})
pyo.iplot(fig)
fig['layout'].update({'legend' : {'tracegroupgap' : 600}})
fig['layout']['annotations'].append({'font': {'color': 'grey', 'size': 10},
'showarrow': False,
'text': '<i>Source: https://data.nasa.gov/view/ak9y-cwf9</i>',
'x': 0.1,
'xref': 'paper',
'y': -0.1,
'yref': 'paper'})
pyo.iplot(fig)